# Big Tech experiment

# Load relevant libraries
library(tidyverse)
library(ggthemes)
library(lfe)

red_mit <- '#A31F34'
blue_mit <- '#315485'

##########################
# Data processing
##########################
source('Code/merge_waves_tech.R')

# Recode treatments so that placebo is the baseline.
vid$treatmentw1 <- factor(vid$treatmentw1)
vid$treatmentw1 <- relevel(vid$treatmentw1, ref='placebo')

text$treatmentw1 <- factor(text$treatmentw1)
text$treatmentw1 <- relevel(text$treatmentw1, ref='placebo')

#############################
# Regressions
#############################

vid_wave1_mod_break <- felm(break_techw1~ treatmentw1 + agew1 + manw1 + pid7w1 + ideow1 + 
                              native.americanw1 + asianw1 + blackw1 + hispanicw1 + whitew1 + 
                              middle.easternw1, vid)
vid_wave2_mod_break <- felm(break_techw2~ treatmentw1 + agew1 + manw1 + pid7w1 + ideow1 + 
                              native.americanw1 + asianw1 + blackw1 + hispanicw1 + whitew1 + 
                              middle.easternw1, vid)
text_wave1_mod_break <- felm(break_techw1~ treatmentw1 + agew1 + manw1 + pid7w1 + ideow1 + 
                               native.americanw1 + asianw1 + blackw1 + hispanicw1 + whitew1 + 
                               middle.easternw1, text)
text_wave2_mod_break <- felm(break_techw2~ treatmentw1 + agew1 + manw1 + pid7w1 + ideow1 + 
                               native.americanw1 + asianw1 + blackw1 + hispanicw1 + whitew1 + 
                               middle.easternw1, text)


vid_wave1_mod_pc <- felm(pcw1~ treatmentw1 + agew1 + manw1 + pid7w1 + ideow1 + 
                           native.americanw1 + asianw1 + blackw1 + hispanicw1 + whitew1 + 
                           middle.easternw1, vid)
vid_wave2_mod_pc <- felm(pcw2~ treatmentw1 + agew1 + manw1 + pid7w1 + ideow1 + 
                           native.americanw1 + asianw1 + blackw1 + hispanicw1 + whitew1 + 
                           middle.easternw1, vid)
text_wave1_mod_pc <- felm(pcw1~ treatmentw1 + agew1 + manw1 + pid7w1 + ideow1 + 
                            native.americanw1 + asianw1 + blackw1 + hispanicw1 + whitew1 + 
                            middle.easternw1, text)
text_wave2_mod_pc <- felm(pcw2~ treatmentw1 + agew1 + manw1 + pid7w1 + ideow1 + 
                            native.americanw1 + asianw1 + blackw1 + hispanicw1 + whitew1 + 
                            middle.easternw1, text)

extract_estimates <- function(model1, model2, treatment_levels = c("treatmentw1anti", "treatmentw1pro")) {
  extract_single <- function(model, treatment) {
    coef <- coef(model)[treatment]
    se <- sqrt(diag(vcov(model)))[treatment]
    data.frame(
      effect = coef,
      lower = coef + qnorm(0.025) * se,
      upper = coef + qnorm(0.975) * se,
      lower_90 = coef + qnorm(0.05) * se,
      upper_90 = coef + qnorm(0.95) * se
    )
  }
  
  bind_rows(
    extract_single(model1, treatment_levels[1]),
    extract_single(model2, treatment_levels[1]),
    extract_single(model1, treatment_levels[2]),
    extract_single(model2, treatment_levels[2])
  ) %>%
    mutate(
      wave = rep(1:2, 2),
      valence = rep(c("Anti Breakup", "Pro Breakup"), each = 2)
    )
}

vid_break_ests <- extract_estimates(vid_wave1_mod_break, vid_wave2_mod_break)
text_break_ests <- extract_estimates(text_wave1_mod_break, text_wave2_mod_break)
vid_pc_ests <- extract_estimates(vid_wave1_mod_pc, vid_wave2_mod_pc)
text_pc_ests <- extract_estimates(text_wave1_mod_pc, text_wave2_mod_pc)

vid_ests <- bind_rows(
  mutate(vid_break_ests, outcome = "Number of Companies Broken Up"),
  mutate(vid_pc_ests, outcome = "First Principal Component")
)

text_ests <- bind_rows(
  mutate(text_break_ests, outcome = "Number of Companies Broken Up"),
  mutate(text_pc_ests, outcome = "First Principal Component")
)

vid_and_text_ests <- bind_rows(
  mutate(vid_ests, mode = "Treatment Mode: Video"),
  mutate(text_ests, mode = "Treatment Mode: Text")
) %>%
  mutate(valence = recode(valence, "Pro Breakup" = "MSNBC", "Anti Breakup" = "Fox"))

#############################
# Figure 4
#############################

breakup_plot <- ggplot(filter(vid_and_text_ests, outcome == "Number of Companies Broken Up"), 
                       aes(y = effect, x = wave, color = valence)) +
  geom_hline(yintercept = 0, lty = 2) + 
  geom_point(position = position_dodge(width = 0.25)) + 
  geom_line(aes(group = valence), position = position_dodge(width = 0.25)) + 
  geom_errorbar(aes(ymin = lower, ymax = upper, width = 0), size = 0.5, position = position_dodge(width = 0.25)) + 
  geom_errorbar(aes(ymin = lower_90, ymax = upper_90, width = 0), size = 1, position = position_dodge(width = 0.25)) + 
  geom_text(data = data.frame(wave = c(1.25, 1.6), 
                              effect = c(0.4, -0.25), 
                              label = c("MSNBC rather\nthan Entertainment", "Fox rather\nthan Entertainment"),
                              mode = "Treatment Mode: Text",
                              valence = c("MSNBC", "Fox")), 
            aes(label = label)) +
  facet_wrap(~ mode, nrow = 1) + 
  theme_bw() + 
  scale_x_continuous('Survey Wave', breaks = c(1, 2), limits = c(0.5, 2.5)) + 
  scale_y_continuous('Effect of Treatment on\nNumber of Companies Broken Up',
                     breaks = seq(-0.5, 0.5, 0.25),
                     labels = c("\n\n\n-0.5\n\n(Fewer\ncompanies)", "-0.25", "0.0", "0.25", "(More\ncompanies)\n\n0.5\n\n\n"),
                     limits = c(-0.5, 0.62)) + 
  scale_color_manual("Treatment Valence", breaks = c("Fox", "MSNBC"), values = c(red_mit, blue_mit)) +
  theme(legend.position = "none", 
        strip.background = element_rect(fill = "lightgrey"), 
        axis.title.y = element_text(margin = margin(r = -20)))
ggsave('Output/fig4.pdf', breakup_plot, width = 8, height = 4)

#############################
# Figure 5
#############################

pc_plot <- ggplot(filter(vid_and_text_ests, outcome == "First Principal Component"), 
                  aes(y = effect, x = wave, color = valence)) +
  geom_hline(yintercept = 0, lty = 2) + 
  geom_point(position = position_dodge(width = 0.25)) + 
  geom_line(aes(group = valence), position = position_dodge(width = 0.25)) + 
  geom_errorbar(aes(ymin = lower, ymax = upper, width = 0), size = 0.5, position = position_dodge(width = 0.25)) + 
  geom_errorbar(aes(ymin = lower_90, ymax = upper_90, width = 0), size = 1, position = position_dodge(width = 0.25)) + 
  geom_text(data = data.frame(wave = c(1.4, 1.8), 
                              effect = c(-0.2, 0.2), 
                              label = c("MSNBC rather\nthan Entertainment", "Fox rather\nthan Entertainment"),
                              mode = "Treatment Mode: Text",
                              valence = c("MSNBC", "Fox")), 
            aes(label = label)) +
  facet_wrap(~ mode, nrow = 1) + 
  theme_bw() + 
  scale_x_continuous('Survey Wave', breaks = c(1, 2), limits = c(0.5, 2.5)) + 
  scale_y_continuous('Effect of Treatment on\nFirst Principal Component',
                     breaks = seq(-0.3, 0.3, 0.1),
                     labels = c("\n\n\n-0.3\n\n(More\nliberal)", '-.2', '-.1', "0.0", '.1', '.2', "(More\nconservative)\n\n0.3\n\n\n"),
                     limits = c(-0.3, 0.3)) + 
  scale_color_manual("Treatment Valence", breaks = c("Fox", "MSNBC"), values = c(red_mit, blue_mit)) +
  theme(legend.position = "none", 
        strip.background = element_rect(fill = "lightgrey"), 
        axis.title.y = element_text(margin = margin(r = -10)))
ggsave('Output/fig5.pdf', pc_plot, width = 8, height = 4)

###############################################
# Save estimates
###############################################
save_estimate_tex <- function(estimate, filename) {
  formatted_estimate <- format(round(estimate, 3), nsmall = 3)
  writeLines(formatted_estimate, paste0("Output/Estimates/", filename, ".tex"))
}

for (i in 1:nrow(vid_and_text_ests)) {
  row <- vid_and_text_ests[i,]
  base_filename <- paste0(
    ifelse(row$mode == "Treatment Mode: Video", "video_", "text_"),
    tolower(row$valence),
    "_wave", row$wave,
    "_", tolower(gsub(" ", "_", row$outcome))
  )
  save_estimate_tex(row$effect, paste0(base_filename, "_effect"))
  save_estimate_tex(row$lower, paste0(base_filename, "_effect_lower"))
  save_estimate_tex(row$upper, paste0(base_filename, "_effect_upper"))
}

save_positive_coefficient <- function(estimate, filename) {
  positive_estimate <- -(estimate)
  formatted_estimate <- format(round(positive_estimate, 3))
  writeLines(formatted_estimate, paste0("Output/Estimates/", filename, ".tex"))
}

save_treatment_estimates <- function(data, treatment_type, outcome_type) {
  data_filtered <- data %>%
    filter(mode == paste("Treatment Mode:", treatment_type),
           valence == "Fox",
           outcome == outcome_type)
  
  for (w in 1:2) {
    wave_data <- data_filtered[data_filtered$wave == w, ]
    base_filename <- paste0("wave", w, "_", tolower(treatment_type), "_fox_", tolower(gsub(" ", "_", outcome_type)))
    
    save_positive_coefficient(wave_data$effect, paste0(base_filename, "_effect_positive"))
    save_positive_coefficient(wave_data$lower, paste0(base_filename, "_effect_lower_positive"))
    save_positive_coefficient(wave_data$upper, paste0(base_filename, "_effect_upper_positive"))
  }
}

save_treatment_estimates(vid_and_text_ests, "Video", "Number of Companies Broken Up")
save_treatment_estimates(vid_and_text_ests, "Text", "Number of Companies Broken Up")



## Figure 5 notes

ana_vid <- vid
ana_text <- text

ctrl_mean_sd_N_tech <- function(data, y, treatment_col = "treatmentw1") {
  z <- data[[y]][data[[treatment_col]] == "placebo"]
  c(mean = mean(z, na.rm = TRUE), sd = sd(z, na.rm = TRUE), N = sum(!is.na(z)))
}

# --- First PC: control-group means/SD (and Ns) by wave for VIDEO
vid_pc_w1 <- ctrl_mean_sd_N_tech(ana_vid, "pcw1")
vid_pc_w2 <- ctrl_mean_sd_N_tech(ana_vid, "pcw2")

# --- First PC: control-group means/SD (and Ns) by wave for TEXT
text_pc_w1 <- ctrl_mean_sd_N_tech(ana_text, "pcw1")
text_pc_w2 <- ctrl_mean_sd_N_tech(ana_text, "pcw2")


fmt <- function(x) sprintf("%.2f", x)
fmt0 <- function(x) format(round(x), big.mark = ",")
caption_pc_fig5 <- paste0(
  "Control (Entertainment) means [SD], N — ",
  "Video condition: Wave 1: ", fmt(vid_pc_w1["mean"]), " [", fmt(vid_pc_w1["sd"]), "], N=", fmt0(vid_pc_w1["N"]), "; ",
  "Wave 2: ", fmt(vid_pc_w2["mean"]), " [", fmt(vid_pc_w2["sd"]), "], N=", fmt0(vid_pc_w2["N"]), ". ",
  "Text condition: Wave 1: ", fmt(text_pc_w1["mean"]), " [", fmt(text_pc_w1["sd"]), "], N=", fmt0(text_pc_w1["N"]), "; ",
  "Wave 2: ", fmt(text_pc_w2["mean"]), " [", fmt(text_pc_w2["sd"]), "], N=", fmt0(text_pc_w2["N"]), ". "
)

# ---- Print to console
cat("\n[Figure 5 — First PC note]\n", caption_pc_fig5, "\n", sep = "")



###############################################
# Appendix C - Subgroup Effects by Media Preference in Experiments 2 and 3
###############################################

extract_estimates_subgroup <- function(model1, model2, treatment_levels = c("treatmentw1anti", "treatmentw1pro")) {
  if (is.null(model1) || is.null(model2)) {
    return(NULL)
  }
  
  extract_single <- function(model, treatment) {
    if (!(treatment %in% names(coef(model)))) {
      return(data.frame(
        effect = NA,
        lower = NA,
        upper = NA,
        lower_90 = NA,
        upper_90 = NA
      ))
    }
    coef_val <- coef(model)[treatment]
    se <- sqrt(diag(vcov(model)))[treatment]
    data.frame(
      effect = coef_val,
      lower = coef_val + qnorm(0.025) * se,
      upper = coef_val + qnorm(0.975) * se,
      lower_90 = coef_val + qnorm(0.05) * se,
      upper_90 = coef_val + qnorm(0.95) * se
    )
  }
  
  bind_rows(
    extract_single(model1, treatment_levels[1]),
    extract_single(model2, treatment_levels[1]),
    extract_single(model1, treatment_levels[2]),
    extract_single(model2, treatment_levels[2])
  ) %>%
    mutate(
      wave = rep(1:2, 2),
      valence = rep(c("Anti Breakup", "Pro Breakup"), each = 2)
    )
}

# Create subsets for each media preference
text_fox <- text[text$med_prefw1 == "Fox News" & !is.na(text$med_prefw1), ]
text_msnbc <- text[text$med_prefw1 == "MSNBC" & !is.na(text$med_prefw1), ]
text_ent <- text[text$med_prefw1 == "Food Network" & !is.na(text$med_prefw1), ]

print(paste("Text subgroup sizes - Fox News:", nrow(text_fox), "MSNBC:", nrow(text_msnbc), "Food Network:", nrow(text_ent)))

vid_fox   <- vid[vid$med_prefw1 == "Fox News"      & !is.na(vid$med_prefw1), ]
vid_msnbc <- vid[vid$med_prefw1 == "MSNBC"         & !is.na(vid$med_prefw1), ]
vid_ent   <- vid[vid$med_prefw1 == "Food Network"  & !is.na(vid$med_prefw1), ]
cat("Video subgroup sizes — Fox:", nrow(vid_fox),
    "MSNBC:", nrow(vid_msnbc), "Food Network:", nrow(vid_ent), "\n")


# Text experiment PC models by media preference
text_wave1_pc_fox <- lm(pcw1 ~ treatmentw1, text_fox)
text_wave2_pc_fox <- lm(pcw2 ~ treatmentw1, text_fox)

text_wave1_pc_msnbc <- lm(pcw1 ~ treatmentw1, text_msnbc)
text_wave2_pc_msnbc <- lm(pcw2 ~ treatmentw1, text_msnbc)

text_wave1_pc_ent <- lm(pcw1 ~ treatmentw1, text_ent)
text_wave2_pc_ent <- lm(pcw2 ~ treatmentw1, text_ent)

# Video experiment PC models by media preference
vid_wave1_pc_fox <- lm(pcw1 ~ treatmentw1, vid_fox)
vid_wave2_pc_fox <- lm(pcw2 ~ treatmentw1, vid_fox)

vid_wave1_pc_msnbc <- lm(pcw1 ~ treatmentw1, vid_msnbc)
vid_wave2_pc_msnbc <- lm(pcw2 ~ treatmentw1, vid_msnbc)

vid_wave1_pc_ent <- lm(pcw1 ~ treatmentw1, vid_ent)
vid_wave2_pc_ent <- lm(pcw2 ~ treatmentw1, vid_ent)

text_pc_ests_fox <- extract_estimates_subgroup(text_wave1_pc_fox, text_wave2_pc_fox)
text_pc_ests_msnbc <- extract_estimates_subgroup(text_wave1_pc_msnbc, text_wave2_pc_msnbc)
text_pc_ests_ent <- extract_estimates_subgroup(text_wave1_pc_ent, text_wave2_pc_ent)

vid_pc_ests_fox <- extract_estimates_subgroup(vid_wave1_pc_fox, vid_wave2_pc_fox)
vid_pc_ests_msnbc <- extract_estimates_subgroup(vid_wave1_pc_msnbc, vid_wave2_pc_msnbc)
vid_pc_ests_ent <- extract_estimates_subgroup(vid_wave1_pc_ent, vid_wave2_pc_ent)

text_pc_subgroup <- bind_rows(
  if(!is.null(text_pc_ests_fox)) mutate(text_pc_ests_fox, `Media Preference` = "Stated Preference: Fox"),
  if(!is.null(text_pc_ests_msnbc)) mutate(text_pc_ests_msnbc, `Media Preference` = "Stated Preference: MSNBC"),
  if(!is.null(text_pc_ests_ent)) mutate(text_pc_ests_ent, `Media Preference` = "Stated Preference: Entertainment")
)

vid_pc_subgroup <- bind_rows(
  if(!is.null(vid_pc_ests_fox)) mutate(vid_pc_ests_fox, `Media Preference` = "Stated Preference: Fox"),
  if(!is.null(vid_pc_ests_msnbc)) mutate(vid_pc_ests_msnbc, `Media Preference` = "Stated Preference: MSNBC"),
  if(!is.null(vid_pc_ests_ent)) mutate(vid_pc_ests_ent, `Media Preference` = "Stated Preference: Entertainment")
)

pc_subgroup_all <- bind_rows(
  mutate(text_pc_subgroup, mode = "Treatment Mode: Text"),
  mutate(vid_pc_subgroup, mode = "Treatment Mode: Video")
) %>%
  mutate(
    valence = recode(valence, "Pro Breakup" = "MSNBC", "Anti Breakup" = "Fox"),
    `Media Preference` = factor(`Media Preference`, 
                                levels = c("Stated Preference: MSNBC", "Stated Preference: Entertainment", "Stated Preference: Fox"),
                                ordered = TRUE)
  )

if (nrow(filter(pc_subgroup_all, !is.na(effect))) > 0) {
  labs_pc <- data.frame(
    wave = c(1.42, 1.8),
    effect = c(0.6, -0.3),
    label = c("Fox rather\nthan Entertainment", "MSNBC rather\nthan Entertainment"),
    valence = c("Fox", "MSNBC"),
    Media_Preference = "Stated Preference: MSNBC",
    mode = "Treatment Mode: Text",
    stringsAsFactors = FALSE
  ) %>%
    rename(`Media Preference` = Media_Preference) %>%
    mutate(`Media Preference` = factor(`Media Preference`,
                                       levels = c("Stated Preference: MSNBC", "Stated Preference: Entertainment", "Stated Preference: Fox"),
                                       ordered = TRUE))
  
  pc_subgroup_plot <- ggplot(filter(pc_subgroup_all, !is.na(effect)), 
                             aes(y = effect, x = wave, color = valence)) +
    geom_hline(yintercept = 0, lty = 2) + 
    geom_point(position = position_dodge(width = 0.25)) + 
    geom_line(aes(group = valence), position = position_dodge(width = 0.25)) + 
    geom_errorbar(aes(ymin = lower, ymax = upper, width = 0), size = 0.5, position = position_dodge(width = 0.25)) + 
    geom_errorbar(aes(ymin = lower_90, ymax = upper_90, width = 0), size = 1, position = position_dodge(width = 0.25)) + 
    geom_text(data = labs_pc, aes(label = label)) +
    facet_grid(`Media Preference` ~ mode) + 
    theme_bw() + 
    scale_x_continuous('Survey Wave', breaks = c(1, 2), limits = c(0.5, 2.5)) + 
    scale_y_continuous('Effect of Treatment on\nFirst Principal Component',
                       breaks = seq(-0.6, 0.6, 0.3),
                       labels = c("\n\n\n-0.6\n\n(More\nliberal)", "-0.3", "0.0", "0.3", "(More\nconservative)\n\n0.6\n\n\n"),
                       limits = c(-0.72, 0.72)) + 
    scale_color_manual("Treatment Valence", breaks = c("Fox", "MSNBC"), values = c(red_mit, blue_mit)) +
    theme(legend.position = "none", 
          strip.background = element_rect(fill = "lightgrey"), 
          axis.title.y = element_text(margin = margin(r = 0)))
  ggsave('Output/figA2_pc_subgroup.pdf', pc_subgroup_plot, width = 8, height = 12)
}
